import requests
from lxml import etree
import pandas as pd
import numpy as np

data_headers = {'User-Agent': 'Mozilla/5.0(Windows;U;Windows NT6.1;en-US;rv:1.9.1.6) Geko/20091201 Firefox/3.5.6'}
url = 'http://piaofang.maoyan.com/box-office?ver=normal'
r = requests.get(url, headers=data_headers, timeout=1).content.decode('utf-8')
soup = etree.HTML(r)
name = soup.xpath('//div[@class="name-wrap"]/p[@class="movie-name"]/text()')  # 电影名
all_booking = soup.xpath('//div[@class="name-wrap"]/p[@class="web-info"]/span[@class="sumBox"]/text()')  # 当前总票房
out_days = soup.xpath('//div[@class="name-wrap"]/p[@class="web-info"]/span[@class="releaseInfo"]/text()')  # 上映天数
cur_booking = soup.xpath('//td[@class="tbody-col"]/div[@class="boxDesc-wrap red-color"]/text()')  # 综合票房
# print(name)
# print(all_booking)
# print(out_days)
# print(cur_booking)
# print(r)
data = pd.DataFrame(index=np.arange(0,10),columns=['电影名','票房','上映时间','综合票房'])
data['电影名'] = pd.Series(name)
data['票房'] = pd.Series(all_booking)
data['上映时间'] = pd.Series(out_days)
data['综合票房'] = pd.Series(cur_booking)

print(data)